Species-specific community analyses conducted to generate the data included in these analyses can be found in the annex.
Library complexity
Nonpareil estimate of the metagenomic complexity after removing host DNA.
all_data %>%
select(dataset,Extraction,nonpareil,Taxon) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(nonpareil), sd(nonpareil))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of breadth of host genome coverage")
tinytable_9hpca6vkb9d0l8f4lw2x
Mean and standard deviation of breadth of host genome coverage
| Taxon |
DREX |
EHEX |
ZYMO |
| Amphibian |
0.8±0.1 |
0.8±0.1 |
0.9±0.1 |
| Bird |
1.0±0.0 |
0.8±0.4 |
0.9±0.1 |
| Control |
0.5±0.6 |
0.5±0.7 |
0.0±0.0 |
| Mammal |
0.8±0.1 |
0.7±0.3 |
0.8±0.2 |
| Reptile |
0.9±0.0 |
0.9±0.1 |
0.9±0.1 |
all_data %>%
select(dataset,Extraction,nonpareil,Taxon) %>%
unique() %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal","Bird","Control"))) %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
ggplot(aes(x=Extraction,y=nonpareil))+
geom_boxplot() +
facet_grid(. ~ Taxon, scales = "free") +
labs(y="Nonpareil completeness",x="Extraction method")

all_data %>%
select(dataset,Extraction,Sample,Species,nonpareil,Taxon) %>%
unique() %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(nonpareil ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()
tinytable_o1m91po2hkp3ez668rg9
| effect |
group |
term |
estimate |
std.error |
statistic |
df |
p.value |
| fixed |
NA |
(Intercept) |
0.885552853 |
0.03450229 |
25.6664948 |
40.89542 |
7.753071e-27 |
| fixed |
NA |
ExtractionDREX |
0.005536892 |
0.04336611 |
0.1276779 |
48.00020 |
8.989373e-01 |
| fixed |
NA |
ExtractionEHEX |
-0.112193866 |
0.04336611 |
-2.5871326 |
48.00020 |
1.276294e-02 |
| ran_pars |
Sample |
sd__(Intercept) |
0.059565487 |
NA |
NA |
NA |
NA |
| ran_pars |
Species |
sd__(Intercept) |
0.035030813 |
NA |
NA |
NA |
NA |
| ran_pars |
Residual |
sd__Observation |
0.150224598 |
NA |
NA |
NA |
NA |
Alpha diversity
Variance partitioning metrics are derived from community_analysis.Rmd.
alpha_data <- list.files(path = "results", pattern = "^alpha_.*\\.tsv$", full.names = TRUE) %>%
map_df(~ read_tsv(.)) %>%
left_join(all_data,by= join_by(dataset==dataset))
alpha_data %>%
pivot_longer(!c(dataset,Library,Species,Taxon,Sample,Extraction), names_to = "metric", values_to = "value") %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal"))) %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
ggplot(aes(x=Extraction,y=value))+
geom_boxplot() +
facet_grid(metric ~ Taxon, scales = "free")

Richness
alpha_data %>%
select(dataset,Extraction,Sample,Species,richness,Taxon) %>%
unique() %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
lmerTest::lmer(richness ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()
tinytable_pjvr89dadesyvmnjddm5
| effect |
group |
term |
estimate |
std.error |
statistic |
df |
p.value |
| fixed |
NA |
(Intercept) |
59.44444444 |
10.715517 |
5.5475108 |
10.12322 |
0.000234351 |
| fixed |
NA |
ExtractionDREX |
0.05555556 |
4.453352 |
0.0124750 |
34.97883 |
0.990117532 |
| fixed |
NA |
ExtractionEHEX |
1.38257472 |
4.545332 |
0.3041746 |
35.13134 |
0.762789322 |
| ran_pars |
Sample |
sd__(Intercept) |
16.00359332 |
NA |
NA |
NA |
NA |
| ran_pars |
Species |
sd__(Intercept) |
28.56742258 |
NA |
NA |
NA |
NA |
| ran_pars |
Residual |
sd__Observation |
13.36005511 |
NA |
NA |
NA |
NA |
Neutral
alpha_data %>%
select(dataset,Extraction,Sample,Species,neutral,Taxon) %>%
unique() %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
lmerTest::lmer(neutral ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()
tinytable_08bhf8uublqlvroqt3fp
| effect |
group |
term |
estimate |
std.error |
statistic |
df |
p.value |
| fixed |
NA |
(Intercept) |
29.5413181 |
5.139162 |
5.7482754 |
9.895197 |
0.000193227 |
| fixed |
NA |
ExtractionDREX |
-2.1102758 |
1.922343 |
-1.0977625 |
35.042971 |
0.279794105 |
| fixed |
NA |
ExtractionEHEX |
-0.3422373 |
1.962694 |
-0.1743712 |
35.152526 |
0.862574163 |
| ran_pars |
Sample |
sd__(Intercept) |
8.6429526 |
NA |
NA |
NA |
NA |
| ran_pars |
Species |
sd__(Intercept) |
13.5543072 |
NA |
NA |
NA |
NA |
| ran_pars |
Residual |
sd__Observation |
5.7670282 |
NA |
NA |
NA |
NA |
Phylogenetic
alpha_data %>%
select(dataset,Extraction,Sample,Species,phylogenetic,Taxon) %>%
unique() %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
lmerTest::lmer(phylogenetic ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()
tinytable_etsa3nifli19n6jldv5m
| effect |
group |
term |
estimate |
std.error |
statistic |
df |
p.value |
| fixed |
NA |
(Intercept) |
4.7721984 |
0.4324985 |
11.034024 |
9.653749 |
8.685116e-07 |
| fixed |
NA |
ExtractionDREX |
0.1521100 |
0.1403528 |
1.083769 |
35.015284 |
2.858737e-01 |
| fixed |
NA |
ExtractionEHEX |
0.1852709 |
0.1433723 |
1.292236 |
35.056809 |
2.047290e-01 |
| ran_pars |
Sample |
sd__(Intercept) |
1.2179889 |
NA |
NA |
NA |
NA |
| ran_pars |
Species |
sd__(Intercept) |
0.9236345 |
NA |
NA |
NA |
NA |
| ran_pars |
Residual |
sd__Observation |
0.4210584 |
NA |
NA |
NA |
NA |
Microbial complexity recovery
all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(damr), sd(damr))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of breadth of host genome coverage")
tinytable_scajeu357qawp1ypxx2g
Mean and standard deviation of breadth of host genome coverage
| Taxon |
DREX |
EHEX |
ZYMO |
| Amphibian |
0.8±0.3 |
0.8±0.3 |
0.8±0.4 |
| Bird |
0.5±0.4 |
0.6±0.4 |
0.6±0.4 |
| Control |
1.0±0.0 |
1.0±0.0 |
1.0±0.0 |
| Mammal |
0.9±0.1 |
0.8±0.3 |
0.8±0.1 |
| Reptile |
1.0±0.0 |
1.0±0.0 |
0.9±0.1 |
all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal","Bird","Control"))) %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
ggplot(aes(x=Extraction,y=damr))+
geom_boxplot() +
facet_grid(. ~ Taxon, scales = "free") +
labs(y="Domain-adjusted mapping rate",x="Extraction method")

all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon, Sample, Species) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(damr ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()
tinytable_vkbc5i6m2y6f82drwgac
| effect |
group |
term |
estimate |
std.error |
statistic |
df |
p.value |
| fixed |
NA |
(Intercept) |
0.78271978 |
0.06660971 |
11.7508366 |
15.45687 |
4.117676e-09 |
| fixed |
NA |
ExtractionDREX |
0.03348421 |
0.03977983 |
0.8417385 |
130.19072 |
4.014779e-01 |
| fixed |
NA |
ExtractionEHEX |
0.02228400 |
0.03999367 |
0.5571881 |
130.24817 |
5.783552e-01 |
| ran_pars |
Sample |
sd__(Intercept) |
0.12024342 |
NA |
NA |
NA |
NA |
| ran_pars |
Species |
sd__(Intercept) |
0.19047224 |
NA |
NA |
NA |
NA |
| ran_pars |
Residual |
sd__Observation |
0.20283813 |
NA |
NA |
NA |
NA |
Variance partitioning
Variance partitioning metrics are derived from community_analysis.Rmd.
variance_data <- list.files(path = "results", pattern = "^var_.*\\.tsv$", full.names = TRUE) %>%
map_df(~ read_tsv(.))
variance_data %>% summarise(mean=mean(r2),sd=sd(r2))
# A tibble: 1 × 2
mean sd
<dbl> <dbl>
1 0.102 0.0768
variance_data %>%
left_join(sample_metadata %>% select(Species,Taxon) %>% unique(),by=join_by(species==Species)) %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal"))) %>%
ggplot(aes(x=Taxon,y=r2)) +
geom_boxplot()+
ylim(0,1)+
facet_grid(. ~ metric, scales = "free")

variance_data %>%
group_by(metric) %>%
summarise(mean=mean(r2),sd=sd(r2)) %>%
tt()
tinytable_6v6g91e1qhtf8g46454r
| metric |
mean |
sd |
| neutral |
0.06201935 |
0.04744380 |
| phylogenetic |
0.07381277 |
0.06884687 |
| richness |
0.16970818 |
0.06626388 |